------------------------------------------------------------------------------------------------------------------------------------
      name:  plog_735
       log:  /accounts/projects/jr_ra/GRscarring/erratum/programs/prepare/clean_compile_march.log
  log type:  text
 opened on:  27 Nov 2024, 17:00:10

. ********************************************************************************
. 
. * clean_compile_march.do
. * Cleans and compiles 1979-2015 march cps data 
. * (For the most part) keeping person level records here
. 
. *modified: RY, 4/2/2018, edited to extend back to 1981. 
. *       Note: there are significant differences between the 1980 and 1981 march supplements. 
. *               prior to 1981 the march supplement is missing several variables, including
. *               everything after column 338 (the earnings variables, 
. *               parent present, spouse present.). Therefore we are only extending the 
. *               March CPS back to 1979.
. * 5/21/18, JR: Add topcoded total annual earnings, at 98th percentile.
. * 9/20/18, NR: edited to extend back to 1979 
. *        
. ********************************************************************************
. 
. cap project, doinfo

. if _rc==0 {
.          local pdir "`r(pdir)'"                                                     // the project's main dir.
.          local dofile "`r(dofile)'"                                                 // do-file's stub name
.    local sig {bind:{hi:[`dofile'.dta. RP : `dofile'.do, `c(current_date)']}}    // a signature in notes
.       local doasproject=1
. 
. }

. else {
.         local pdir "~/GRscarring"
.         local dofile "clean_compile_march"
.    local doasproject=0
. }

. 
. set more off

. local rootdir "`pdir'"

. local thisdir "`pdir'"

. set varabbrev off       // for long projects, it's best not to abbreviate

. 
. global nberdata "`pdir'/rawdata/rawfromNBER"

. global nbercode "`pdir'/programs/fromNBER"

. 
. local prepdata "`pdir'/scratch"

. local ipumsdata "`pdir'/rawdata/IPUMS"

. local data "`pdir'/rawdata"

. 
. if `doasproject'==1 {
.         project, original("`ipumsdata'/cps_ind_xwalk.dta")
project GRscar_erratum > do-file uses original: "/scratch/public/jr_ra/GRscarring2024/erratum/rawdata/IPUMS/cps_ind_xwalk.dta" files
> ig(3381164704:73428)
.         project, original("`ipumsdata'/cps_occ_xwalk.dta")
project GRscar_erratum > do-file uses original: "/scratch/public/jr_ra/GRscarring2024/erratum/rawdata/IPUMS/cps_occ_xwalk.dta" files
> ig(3690565970:184249)
.         project, uses("`prepdata'/cpi.dta")
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpi.dta" filesig(169829402:26459)
. }

. 
. ***************************************************************************************************************
. * Create some local varlists:
. 
. local ernvarsA "ern_yn ern_srce ern_otr ern_val wageotr wsal_yn wsal_val ws_val seotr semp_yn semp_val se_val frmotr frse_yn frse_
> val frm_val"

. local increcodes "pearnval ptotval pothval ptot_r"

. local incvarsA "uc_yn subuc strkuc uc_val"

. local incvarsB "wc_yn wc_type wc_val"

. local incvarsC "ss_yn ss_val "    

. local incvarsD "ssi_yn ssi_val"

. local incvarsE "paw_yn paw_typ paw_mon paw_val"

. local incvarsF "vet_yn vet_typ? vet_qva vet_val"

. local incvarsG "sur_yn sur_sc? sur_val? srvs_val"

. local incvarsH "dis_hp dis_cs dis_yn dis_sc? dis_val? dsab_val"

. local incvarsI "ret_yn ret_sc? ret_val? rtm_val"

. local incvarsJ           "int_yn int_val div_yn div_non div_val rnt_yn rnt_val"

. local incvarsJ2015 "int_yn int_val div_yn div_val rnt_yn rnt_val"

. local incvarsK "ed_yn oed_typ? ed_val"

. local incvarsL           "csp_yn csp_val alm_yn alm_val fin_yn fin_val oi_off oi_yn oi_val"

. local incvarsL2015 "csp_yn csp_val fin_yn fin_val oi_off oi_yn oi_val"

. local hivarsA "mcare mcaid champ hi_yn hiown"

. local hivarsB "hiemp hipaid emcontrb hi dephi"

. local hivarsC "paid hiout priv prityp depriv pout out oth otyp_? othstper"

. local hivarsD "othstyp? hea ihsflg ahiper ahityp? pchip cov_gh cov_hi ch_mc ch_hi"

. 
. local occ_ind_vars a_ind industry a_occ a_mjocc a_dtocc a_mjind a_dtind poccu2 occup weind wemind wemocg

. local occ_ind_vars_late industry peioind peioocc mjocc a_dtocc a_mjind a_dtind poccu2 occup weind wemind wemocg

. local occ_ind_vars_later industry peioind peioocc a_mjocc a_dtocc a_mjind a_dtind poccu2 occup weind wemind wemocg

. 
. 
. * Variables not in early (1989-?) March CPS files
. local laterlist resnss1 resnss2 resnssi1 resnssi2 ssikidyn p_mvcare p_mvcaid hityp hilin? pilin? care caid mon

. 
. 
. ***************************************************************************************************************
. 
. * Loop over years and keep relevant variables *
. forvalues yr=79/80 {
  2.         if `doasproject'==1 project, uses(`prepdata'/cpsmar`yr'.dta.gz)
  3.         ! zcat `prepdata'/cpsmar`yr'.dta.gz > `prepdata'/cpsmar`yr'.dta
  4.         use `prepdata'/cpsmar`yr'.dta
  5.         ! rm `prepdata'/cpsmar`yr'.dta
  6.         gen year=19`yr'
  7.         
.         *cleaning up the race variables
.         label values race race
  8.         label define race ///
>                         1       "White" ///
>                         2       "Black" ///
>                         3       "Other"
  9.         rename race a_race
 10.         gen prdtrace=.
 11.         label values ethnicit ethnicit 
 12.         label define ethnicit ///
>                         10      "Mexican American" ///
>                         11      "Chicano" ///
>                         12      "Mexican" ///
>                         13      "Mexicano" ///
>                         14      "Puerto Rican" ///
>                         15      "Cuban" ///
>                         16      "Central or South American" ///
>                         17      "Other Spanish" ///
>                         30      "Another Group Not Listed" ///
>                         39      "Don't Know" ///
>                         40      "Not Available"
 13.         gen pehspnon=.
 14.         replace pehspnon=ethnicit
 15.         
.         label values highgrad highgrad
 16.         label define highgrad ///
>                 0       "Children under 15" ///
>                 1       "None" ///
>                 2       "Elementary one" ///
>                 3       "Elementary two" ///
>                 4       "Elementary three" ///
>                 5       "Elementary four" ///
>                 6       "Elementary five" ///
>                 7       "Elementary six" ///
>                 8       "Elementary seven" ///
>                 9       "Elementary eight" ///
>                 10      "High School one" ///
>                 11      "High School two" ///
>                 12      "High School three" ///
>                 13      "High School four" ///
>                 14      "College one" ///
>                 15      "College two" ///
>                 16      "College three" ///
>                 17      "College four" ///
>                 18      "College five" ///
>                 19      "College six or more" 
 17.         
.         label values empst empst
 18.         label define empst  ///
>                 0       "NIU" ///
>                 1       "Full time" ///
>                 2       "Part time" ///
>                 3       "Unemployed experienced" ///
>                 4       "Unemployed not experienced" ///
>                 5       "Armed forces" ///
>                 6       "Not in labor force" 
 19.         
.         label values bfullpar bfullpar
 20.         label define bfullpar  ///
>                 0       "NIU" ///
>                 1       "Employed full time" ///
>                 2       "Part time for economic reasons" ///
>                 3       "unemployed full time" ///
>                 4       "employed part time" ///
>                 5       "unemployed part time"
 21.                 
.         *renaming the education variable
.         rename highgrad a_hga
 22.         *renaming the state variable
.         gen state=mststate
 23.         gen state_fips=.
 24.         
.         *renaming variables that are the same
.         ren marstat a_maritl
 25.         ren sex a_sex
 26.         ren a_hrs1 a_uslhrs
 27.         ren weind a_dtind
 28.         ren poccu2 a_dtocc
 29.         ren famrel a_famrel
 30.         ren Tenure h_tenure
 31.         ren marsuppw marsupwt
 32.         ren inern tcernval
 33.         ren intot tcwsval
 34. 
.         *generating variables that are not quite the same across years but we are equating here
.         gen pearnval = incearn
 35.         gen ptotval  = pinctot
 36.         gen ljcw     = a_clswkr
 37.         gen lkweeks  = I43WK    
 38.         gen nwlkwk   = I43WK
 39.         
.         *generating variablest that are missing from this earlier period
.         gen a_occ    = .
 40.         gen a_mjocc  = .        
 41.         gen a_ind    = .
 42.         gen a_mjind  = . 
 43.         gen a_werntf = .
 44.         
.         tempfile mar19`yr'
 45.         save `mar19`yr''
 46. }
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar79.dta.gz" filesig(3770398138:684
> 4312)


(154,593 missing values generated)
(154,593 missing values generated)
(154,593 real changes made)
(154,593 missing values generated)
(154,593 missing values generated)
(154,593 missing values generated)
(154,593 missing values generated)
(154,593 missing values generated)
(154,593 missing values generated)
file /tmp/St2868349.000004 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar80.dta.gz" filesig(480327720:8254
> 291)


(181,488 missing values generated)
(181,488 missing values generated)
(181,488 real changes made)
(181,488 missing values generated)
(181,488 missing values generated)
(181,488 missing values generated)
(181,488 missing values generated)
(181,488 missing values generated)
(181,488 missing values generated)
file /tmp/St2868349.000005 saved as .dta format

. 
. forvalues yr=81/87 {
  2.         if `doasproject'==1 project, uses(`prepdata'/cpsmar`yr'.dta.gz)
  3.         ! zcat `prepdata'/cpsmar`yr'.dta.gz > `prepdata'/cpsmar`yr'.dta
  4.         use `prepdata'/cpsmar`yr'.dta
  5.         ! rm `prepdata'/cpsmar`yr'.dta
  6.         gen year=19`yr'
  7.         
.         *cleaning up the race variables
.         label values race race
  8.         label define race ///
>                         1       "White" ///
>                         2       "Black" ///
>                         3       "Other"
  9.         rename race a_race
 10.         gen prdtrace=.
 11.         label values ethnicit ethnicit 
 12.         label define ethnicit ///
>                         10      "Mexican American" ///
>                         11      "Chicano" ///
>                         12      "Mexican" ///
>                         13      "Mexicano" ///
>                         14      "Puerto Rican" ///
>                         15      "Cuban" ///
>                         16      "Central or South American" ///
>                         17      "Other Spanish" ///
>                         30      "Another Group Not Listed" ///
>                         39      "Don't Know" ///
>                         40      "Not Available"
 13.         gen pehspnon=.
 14.         replace pehspnon=ethnicit
 15.         
.         label values highgrad highgrad
 16.         label define highgrad ///
>                 0       "Children under 15" ///
>                 1       "None" ///
>                 2       "Elementary one" ///
>                 3       "Elementary two" ///
>                 4       "Elementary three" ///
>                 5       "Elementary four" ///
>                 6       "Elementary five" ///
>                 7       "Elementary six" ///
>                 8       "Elementary seven" ///
>                 9       "Elementary eight" ///
>                 10      "High School one" ///
>                 11      "High School two" ///
>                 12      "High School three" ///
>                 13      "High School four" ///
>                 14      "College one" ///
>                 15      "College two" ///
>                 16      "College three" ///
>                 17      "College four" ///
>                 18      "College five" ///
>                 19      "College six or more" 
 17.         
.         label values empst empst
 18.         label define empst  ///
>                 0       "NIU" ///
>                 1       "Full time" ///
>                 2       "Part time" ///
>                 3       "Unemployed experienced" ///
>                 4       "Unemployed not experienced" ///
>                 5       "Armed forces" ///
>                 6       "Not in labor force" 
 19.         
.         label values bfullpar bfullpar
 20.         label define bfullpar  ///
>                 0       "NIU" ///
>                 1       "Employed full time" ///
>                 2       "Part time for economic reasons" ///
>                 3       "unemployed full time" ///
>                 4       "employed part time" ///
>                 5       "unemployed part time"
 21.                 
.         *renaming the education variable
.         rename highgrad a_hga
 22.         *renaming the state variable
.         gen state=mststate
 23.         gen state_fips=.
 24.         
.         *renaming variables that are the same
.         ren marstat a_maritl
 25.         ren sex a_sex
 26.         ren spouse a_spouse
 27.         ren earnhrtc a_herntf
 28.         ren earnhour a_hrspay
 29.         ren a_hrs1 a_uslhrs
 30.         ren weind a_dtind
 31.         ren poccu2 a_dtocc
 32.         ren famrel a_famrel
 33.         ren shlftpt a_ftpt
 34.         ren Tenure h_tenure
 35.         ren marsuppw marsupwt
 36.         ren flpinern tcernval
 37.         ren flpintot tcwsval
 38.         
.         *generating variables that are not quite the same across years but we are equating here
.         gen wkswork=I34WK
 39.         gen hrswk=I38 //we are equating number of hours worked to hours usually worked 
 40.         gen pearnval=incearn
 41.         gen ptotval=pinctot
 42.         gen ljcw=a_clswkr
 43.         gen nwlook=I43N
 44.         gen lkweeks=I43WK       
 45.         gen nwlkwk=I43WK
 46.         
.         *generating variablest that are missing from this earlier period
.         gen weclw=.
 47.         gen a_occ=.
 48.         gen a_mjocc=.   
 49.         gen a_ind=.
 50.         gen a_mjind=. 
 51.         gen a_werntf=.
 52.         
.         tempfile mar19`yr'
 53.         save `mar19`yr''
 54. }
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar81.dta.gz" filesig(1419963007:105
> 04060)


(181,358 missing values generated)
(181,358 missing values generated)
(181,358 real changes made)
(181,358 missing values generated)
(181,358 missing values generated)
(181,358 missing values generated)
(181,358 missing values generated)
(181,358 missing values generated)
(181,358 missing values generated)
(181,358 missing values generated)
file /tmp/St2868349.000006 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar82.dta.gz" filesig(289633617:9628
> 729)


(162,703 missing values generated)
(162,703 missing values generated)
(162,703 real changes made)
(162,703 missing values generated)
(162,703 missing values generated)
(162,703 missing values generated)
(162,703 missing values generated)
(162,703 missing values generated)
(162,703 missing values generated)
(162,703 missing values generated)
file /tmp/St2868349.000007 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar83.dta.gz" filesig(3604567529:971
> 2568)


(162,635 missing values generated)
(162,635 missing values generated)
(162,635 real changes made)
(162,635 missing values generated)
(162,635 missing values generated)
(162,635 missing values generated)
(162,635 missing values generated)
(162,635 missing values generated)
(162,635 missing values generated)
(162,635 missing values generated)
file /tmp/St2868349.000008 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar84.dta.gz" filesig(1496103377:949
> 7853)


(161,167 missing values generated)
(161,167 missing values generated)
(161,167 real changes made)
(161,167 missing values generated)
(161,167 missing values generated)
(161,167 missing values generated)
(161,167 missing values generated)
(161,167 missing values generated)
(161,167 missing values generated)
(161,167 missing values generated)
file /tmp/St2868349.000009 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar85.dta.gz" filesig(4084535925:979
> 7999)


(161,362 missing values generated)
(161,362 missing values generated)
(161,362 real changes made)
(161,362 missing values generated)
(161,362 missing values generated)
(161,362 missing values generated)
(161,362 missing values generated)
(161,362 missing values generated)
(161,362 missing values generated)
(161,362 missing values generated)
file /tmp/St2868349.00000a saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar86.dta.gz" filesig(375624952:9962
> 192)


(157,661 missing values generated)
(157,661 missing values generated)
(157,661 real changes made)
(157,661 missing values generated)
(157,661 missing values generated)
(157,661 missing values generated)
(157,661 missing values generated)
(157,661 missing values generated)
(157,661 missing values generated)
(157,661 missing values generated)
file /tmp/St2868349.00000b saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar87.dta.gz" filesig(2163959093:100
> 21069)


(155,468 missing values generated)
(155,468 missing values generated)
(155,468 real changes made)
(155,468 missing values generated)
(155,468 missing values generated)
(155,468 missing values generated)
(155,468 missing values generated)
(155,468 missing values generated)
(155,468 missing values generated)
(155,468 missing values generated)
file /tmp/St2868349.00000c saved as .dta format

. 
. forvalues yr=88/99 {
  2.         if `doasproject'==1 project, uses(`prepdata'/cpsmar`yr'.dta.gz)
  3.         ! zcat `prepdata'/cpsmar`yr'.dta.gz > `prepdata'/cpsmar`yr'.dta
  4.         use `prepdata'/cpsmar`yr'.dta
  5.         ! rm `prepdata'/cpsmar`yr'.dta
  6.         gen year=19`yr'
  7.         ** PROBLEMATIC VARS **  
.         foreach stvar in hg_st60 gestcen {
  8.                 cap confirm var `stvar'
  9.                 if !_rc ren `stvar' state
 10.         }
 11.         local addlist ""
 12.         foreach var of local laterlist {
 13.                 cap confirm var `var'
 14.                 if !_rc local addlist "`addlist' `var'"  
 15.         }
 16.         cap confirm var a_lineno
 17.         if !_rc ren a_lineno pulineno
 18.         cap confirm var h_idnum1
 19.         if !_rc egen hhid=concat(h_idnum1 h_idnum2)
 20.         else gen hhid=h_idnum
 21.         
.         * 1995 variable conventions change: 
.         if `yr'==95 {
 22.                 ren prmarsta a_maritl
 23.                 ren perace a_race
 24.                 ren peage a_age
 25.                 ren pespouse a_spouse
 26.                 ren peeduca a_hga
 27.                 ren pesex a_sex
 28.                 ren pthr a_herntf
 29.                 ren ptwk a_werntf
 30.                 ren prernhly a_hrspay
 31.                 ren prunedur a_wkslk
 32.                 ren pehrusl1 a_uslhrs
 33.                 ren pei01icd a_ind
 34.                 ren prmjind1 a_mjind 
 35.                 ren prdtind1 a_dtind
 36.                 ren pei01ocd a_occ
 37.                 ren prmjocc1 a_mjocc 
 38.                 ren prdtocc1 a_dtocc
 39.                 ren prfamrel a_famrel
 40.                 ren peschft a_ftpt
 41.         } 
 42.         local addlist2 ""
 43.         foreach var in a_race prdtrace pehspnon eit_cred pruntype fl_665 prwkstat agi a_whenlj pelklwo a_wantjb prwntjob  ///
>                                                                  a_mjind prmjind1 a_nlflj penlfjh a_wkstat prwkstat h_hhtype hrint
> sta prerelg {
 44.                 cap confirm var `var'
 45.                 if !_rc local addlist2 "`addlist2' `var'"  
 46.         }
 47. 
.         * Keep relevant vars:
.         keep state hhid pulineno a_spouse a_age age1 a_hga a_maritl a_sex p_stat a_famrel famrel hhdrel h_seq ///
>                          paw_typ a_herntf a_werntf pearnval a_hrspay mcaid mcare  a_wkslk workyn a_ftpt ///
>                          a_uslhrs hrswk a_lfsr a_rcow a_untype h_tenure ///
>                          subuc a_clswkr clwk weclw ljcw ///
>                          lknone nwlook nwlkwk lkweeks wtemp ///
>                          strkuc a_fnlwgt marsupwt wkswork wewkrs year `occ_ind_vars' ///
>                          `ernvarsA' `increcodes' `incvarsA' `incvarsB' `incvarsC' `incvarsD' `incvarsE' `incvarsF' ///
>                          `incvarsG' `incvarsH' `incvarsI' `incvarsJ' `incvarsK' `incvarsL' `hivarsA' `addlist' `addlist2'
 48.                          
.         tempfile mar19`yr'
 49.         save `mar19`yr''
 50. }
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar88.dta.gz" filesig(857480391:1501
> 2952)


file /tmp/St2868349.00000d saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar89.dta.gz" filesig(2021746206:142
> 25025)


file /tmp/St2868349.00000e saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar90.dta.gz" filesig(627552156:1535
> 2723)


file /tmp/St2868349.00000f saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar91.dta.gz" filesig(2758259830:156
> 78101)


file /tmp/St2868349.00000g saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar92.dta.gz" filesig(1576003955:174
> 15727)


file /tmp/St2868349.00000h saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar93.dta.gz" filesig(1544445148:173
> 56764)


file /tmp/St2868349.00000i saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar94.dta.gz" filesig(4076723799:171
> 32514)


file /tmp/St2868349.00000j saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar95.dta.gz" filesig(1576346206:173
> 24057)


file /tmp/St2868349.00000k saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar96.dta.gz" filesig(2961841427:158
> 59834)


file /tmp/St2868349.00000l saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar97.dta.gz" filesig(3903048452:160
> 89156)


file /tmp/St2868349.00000m saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar98.dta.gz" filesig(618006149:1583
> 1156)


file /tmp/St2868349.00000n saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar99.dta.gz" filesig(3205102665:163
> 76017)


file /tmp/St2868349.00000o saved as .dta format

. 
. 
. forvalues yr=0/12 {
  2.         if `yr'<10 local yr "0`yr'"
  3.         if `doasproject'==1 project, uses(`prepdata'/cpsmar`yr'.dta.gz)
  4.         ! zcat `prepdata'/cpsmar`yr'.dta.gz > `prepdata'/cpsmar`yr'.dta
  5.         use `prepdata'/cpsmar`yr'.dta
  6.         ! rm `prepdata'/cpsmar`yr'.dta
  7.         gen year=20`yr'
  8.         ** PROBLEMATIC VARS **  
.         foreach stvar in hg_st60 gestcen {
  9.                 cap confirm var `stvar'
 10.                 if !_rc ren `stvar' state
 11.         }
 12.         local addlist ""
 13.         foreach var of local laterlist {
 14.                 cap confirm var `var'
 15.                 if !_rc local addlist "`addlist' `var'"  
 16.         }
 17.         
.         cap confirm var a_lineno
 18.         if !_rc ren a_lineno pulineno
 19.         cap confirm var h_idnum1
 20.         if !_rc egen hhid=concat(h_idnum1 h_idnum2)
 21.         else gen hhid=h_idnum
 22.         local addlist2 ""
 23.         foreach var in a_race prdtrace pehspnon eit_cred pruntype fl_665 a_wkstat prwkstat agi prerelg {
 24.                 cap confirm var `var'
 25.                 if !_rc local addlist2 "`addlist2' `var'"  
 26.         }
 27. 
.         * Keep relevant vars:
.         if `yr'<03 {
 28.                 keep state hhid pulineno a_spouse a_age age1 a_hga a_maritl a_sex p_stat a_famrel famrel hhdrel h_seq ///
>                                  paw_typ a_herntf a_werntf pearnval a_hrspay mcaid mcare  a_wkslk workyn a_ftpt ///
>                                  a_uslhrs hrswk a_lfsr a_untype h_tenure ///
>                                  subuc a_clswkr clwk weclw ljcw ///
>                                  lknone nwlook nwlkwk lkweeks wtemp ///
>                                  strkuc a_fnlwgt marsupwt wkswork wewkrs year `occ_ind_vars' ///
>                                  `ernvarsA' `increcodes' `incvarsA' `incvarsB' `incvarsC' `incvarsD' `incvarsE' `incvarsF' ///
>                                  `incvarsG' `incvarsH' `incvarsI' `incvarsJ' `incvarsK' `incvarsL' `hivarsA' `addlist' `addlist2'
 29.          }
 30.          else if `yr'>=03 & `yr'<11 {
 31.                 keep state hhid pulineno a_spouse a_age age1 a_hga a_maritl a_sex p_stat a_famrel famrel hhdrel h_seq ///
>                                  paw_typ a_herntf a_werntf pearnval a_hrspay mcaid mcare  a_wkslk workyn a_ftpt ///
>                                  a_uslhrs hrswk a_lfsr a_untype h_tenure ///
>                                  subuc a_clswkr clwk weclw ljcw ///
>                                  lknone nwlook nwlkwk lkweeks wtemp ///
>                                  strkuc a_fnlwgt marsupwt wkswork wewkrs year `occ_ind_vars_late' a_famrel famrel hhdrel ///
>                                  `ernvarsA' `increcodes' `incvarsA' `incvarsB' `incvarsC' `incvarsD' `incvarsE' `incvarsF' ///
>                                  `incvarsG' `incvarsH' `incvarsI' `incvarsJ' `incvarsK' `incvarsL' `hivarsA' `addlist' `addlist2'
 32.          }
 33.          else {
 34.                         keep state hhid pulineno a_spouse a_age age1 a_hga a_maritl a_sex p_stat a_famrel famrel hhdrel h_seq /
> //
>                          paw_typ a_herntf a_werntf pearnval a_hrspay mcaid mcare  a_wkslk workyn a_ftpt ///
>                          a_uslhrs hrswk a_lfsr a_untype h_tenure ///
>                          subuc a_clswkr clwk weclw ljcw ///
>                          lknone nwlook nwlkwk lkweeks wtemp ///
>                          strkuc a_fnlwgt marsupwt wkswork wewkrs year `occ_ind_vars_later' ///
>                          `ernvarsA' `increcodes' `incvarsA' `incvarsB' `incvarsC' `incvarsD' `incvarsE' `incvarsF' ///
>                          `incvarsG' `incvarsH' `incvarsI' `incvarsJ' `incvarsK' `incvarsL' `hivarsA' `addlist' `addlist2'
 35.          }
 36. 
.         tempfile mar20`yr'
 37.         save `mar20`yr''
 38. }               
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar00.dta.gz" filesig(1887375881:168
> 90580)


file /tmp/St2868349.00000p saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar01.dta.gz" filesig(2507884735:166
> 66862)


file /tmp/St2868349.00000q saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar02.dta.gz" filesig(1623457081:272
> 05455)


file /tmp/St2868349.00000r saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar03.dta.gz" filesig(2373609884:277
> 53306)


file /tmp/St2868349.00000s saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar04.dta.gz" filesig(118545913:2417
> 4487)


file /tmp/St2868349.00000t saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar05.dta.gz" filesig(4092946984:280
> 36336)


file /tmp/St2868349.00000u saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar06.dta.gz" filesig(364749653:2735
> 4306)


file /tmp/St2868349.00000v saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar07.dta.gz" filesig(2397250954:269
> 04234)


file /tmp/St2868349.00000w saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar08.dta.gz" filesig(319512975:2681
> 3456)


file /tmp/St2868349.00000x saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar09.dta.gz" filesig(3136557014:275
> 25628)


file /tmp/St2868349.000010 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar10.dta.gz" filesig(3433893522:274
> 83289)


file /tmp/St2868349.000011 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar11.dta.gz" filesig(1486499947:281
> 53168)


file /tmp/St2868349.000012 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar12.dta.gz" filesig(239245682:2761
> 1664)


file /tmp/St2868349.000013 saved as .dta format

. 
. forvalues yr=2013/2018 {
  2.         if `yr'==2014 {
  3.                 if `doasproject'==1 project, uses(`prepdata'/cpsmar`yr't.dta.gz)
  4.                 ! zcat `prepdata'/cpsmar`yr't.dta.gz > `prepdata'/cpsmar`yr'.dta
  5.         }
  6.         else {
  7.                 if `doasproject'==1 project, uses(`prepdata'/cpsmar`yr'.dta.gz)
  8.                 ! zcat `prepdata'/cpsmar`yr'.dta.gz > `prepdata'/cpsmar`yr'.dta
  9.         }
 10.         use `prepdata'/cpsmar`yr'.dta
 11.         ! rm `prepdata'/cpsmar`yr'.dta
 12.         gen year=`yr'
 13.         ** PROBLEMATIC VARS **  
.         /*
>         foreach stvar in hg_st60 gestcen {
>                 cap confirm var `stvar'
>                 if !_rc ren `stvar' state
>         }
>         */
.         ren gestfips state_fips
 14.         
.         local addlist ""
 15.         foreach var of local laterlist {
 16.                 cap confirm var `var'
 17.                 if !_rc local addlist "`addlist' `var'"  
 18.         }
 19.         
.         cap confirm var a_lineno
 20.         if !_rc ren a_lineno pulineno
 21.         cap confirm var h_idnum1
 22.         if !_rc egen hhid=concat(h_idnum1 h_idnum2)
 23.         else gen hhid=h_idnum
 24.         
.         local addlist2 ""
 25.         foreach var in a_race prdtrace pehspnon eit_cred pruntype fl_665 a_wkstat prwkstat agi prerelg {
 26.                 cap confirm var `var'
 27.                 if !_rc local addlist2 "`addlist2' `var'"  
 28.         }
 29.         
.         * Keep relevant vars:
.         if `yr'<2015 {
 30.                 keep state_fips hhid pulineno a_spouse a_age age1 a_hga a_maritl a_sex p_stat a_famrel famrel hhdrel h_seq ///
>                          paw_typ a_herntf a_werntf pearnval a_hrspay mcaid mcare a_wkslk workyn a_ftpt ///
>                          a_uslhrs hrswk a_lfsr a_untype h_tenure ///
>                          subuc a_clswkr clwk weclw ljcw ///
>                          lknone nwlook nwlkwk lkweeks wtemp ///
>                          strkuc a_fnlwgt marsupwt wkswork wewkrs year `occ_ind_vars_later' ///
>                          `ernvarsA' `increcodes' `incvarsA' `incvarsB' `incvarsC' `incvarsD' `incvarsE' `incvarsF' ///
>                          `incvarsG' `incvarsH' `incvarsI' `incvarsJ' `incvarsK' `incvarsL' `hivarsA' `addlist' `addlist2'
 31.         
.         }
 32.         else if inlist(`yr',2015,2016,2017,2018) {
 33.                 keep state_fips hhid pulineno a_spouse a_age age1 a_hga a_maritl a_sex pehspnon p_stat a_famrel famrel hhdrel h
> _seq ///
>                          paw_typ a_herntf a_werntf pearnval a_hrspay mcaid mcare a_wkslk workyn a_ftpt ///
>                          a_uslhrs hrswk a_lfsr a_untype h_tenure ///
>                          subuc a_clswkr clwk weclw ljcw ///
>                          lknone nwlook nwlkwk lkweeks wtemp ///
>                          strkuc a_fnlwgt marsupwt wkswork wewkrs year `occ_ind_vars_later' ///
>                          `ernvarsA' `increcodes' `incvarsA' `incvarsB' `incvarsC' `incvarsD' `incvarsE' `incvarsF' ///
>                          `incvarsG' `incvarsH' `incvarsI' `incvarsJ2015' `incvarsK' `incvarsL2015' `hivarsA' `addlist'  `addlist2'
>               
 34.         }
 35.                          
.         tempfile mar`yr'
 36.         save `mar`yr''           
 37. }       
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar2013.dta.gz" filesig(927801735:28
> 008615)


file /tmp/St2868349.000014 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar2014t.dta.gz" filesig(3735546527:
> 19544863)


file /tmp/St2868349.000015 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar2015.dta.gz" filesig(2050352800:2
> 7777197)


file /tmp/St2868349.000016 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar2016.dta.gz" filesig(2089874264:2
> 5535861)


file /tmp/St2868349.000017 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar2017.dta.gz" filesig(1327521682:2
> 5752967)


file /tmp/St2868349.000018 saved as .dta format
project GRscar_erratum > do-file uses: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/cpsmar2018.dta.gz" filesig(95231840:249
> 64029)


file /tmp/St2868349.000019 saved as .dta format

. 
. ***************************************************************************************************************
. 
. ** APPEND FILES TOGETHER **
. 
. use `mar1979'

. forvalues y=1979/2018 {
  2.         qui append using `mar`y''
  3. }

. 
. 
. ***************************************************************************************************************
. 
. **********************
. ** DO SOME CLEANING **
. **********************
. 
. *** Consistent coding of variables that change over time (most problems in 1995): *** 
. 
. ** Race variables - coding changes 2002-2003      
. 
. gen byte r_white=(a_race==1) if year<2003
(3,178,580 missing values generated)

. gen byte r_black=(a_race==2) if year<2003
(3,178,580 missing values generated)

. gen byte r_asian=(a_race==3) if year<2003
(3,178,580 missing values generated)

. gen byte r_amind=(a_race==4) if year<2003
(3,178,580 missing values generated)

. gen byte r_other=(a_race==5) if year<2003
(3,178,580 missing values generated)

. 
. replace r_black=(inlist(prdtrace, 2, 6, 10, 11, 12, 15, 16, 19)) if year>=2003
(3,178,580 real changes made)

. replace r_asian=(inlist(prdtrace, 4, 5, 8, 11, 13, 14, 16, 17, 18, 19)) if year>=2003
(3,178,580 real changes made)

. replace r_amind=(inlist(prdtrace, 3, 7, 10, 13, 15, 17, 19)) if year>=2003
(3,178,580 real changes made)

. replace r_white=(inlist(prdtrace, 1, 6, 7, 8, 9, 15, 16, 17, 18, 19)) if year>=2003  
(3,178,580 real changes made)

. replace r_other=(inlist(prdtrace, 20, 21, 22, 23, 24, 25, 26)) if year>=2003
(3,178,580 real changes made)

. 
. gen byte r_hispan=(pehspnon==1) if year>=2003
(3,867,850 missing values generated)

. replace r_hispan=1 if pehspnon>=10 | pehspnon<=17 
(6,487,943 real changes made)

. gen byte r_hispan_miss=(r_hispan==.)

. replace r_hispan=0 if r_hispan==.
(0 real changes made)

. 
. drop prdtrace a_race pehspnon ethnicit

. 
. ** Education vars - coding changes from 1991-1992
. 
. gen byte ed_lths=(a_hga>0 & a_hga<12) if a_hga<. & year<=1991
(4,796,179 missing values generated)

. gen byte ed_hs=(a_hga==12) if a_hga<. & year<=1991
(4,796,179 missing values generated)

. gen byte ed_scol=inlist(a_hga, 13,14,15) if a_hga<. & year<=1991
(4,796,179 missing values generated)

. gen byte ed_ba=(a_hga==16) if a_hga<. & year<=1991
(4,796,179 missing values generated)

. gen byte ed_grad=(a_hga>16) if a_hga<. & year<=1991 
(4,796,179 missing values generated)

. 
. replace ed_lths=inlist(a_hga, 31, 32, 33, 34, 35, 36, 37, 38) if a_hga<. & year>1991
(4,796,179 real changes made)

. replace ed_hs=inlist(a_hga, 39) if a_hga<. & year>1991
(4,796,179 real changes made)

. replace ed_scol=inlist(a_hga, 40, 41, 42) if a_hga<. & year>1991
(4,796,179 real changes made)

. replace ed_ba=inlist(a_hga, 43) if a_hga<. & year>1991
(4,796,179 real changes made)

. replace ed_grad=inlist(a_hga, 44, 45, 46) if a_hga<. & year>1991
(4,796,179 real changes made)

. *drop a_hga
. 
. ** Self employment and SE income 
. * SEMP-YN - any own business (all yrs) - recode (not sure if that's what we want...)
. * SEMP-VAL - total earnings, own business SE (all yrs)
. 
. 
. ** Not that important for now, but here are some vars w/ coding changes: 
. * prwkstat a_wkstat 
. * a_whenlj pelklwo 
. * a_wantjb prwntjob 
. * a_nlflj penlfjh 
. * a_mjind prmjind1 
. * h_hhtype hrintsta
. 
. * RACE: (a_race until 2002, prdtrace afterwards)
. 
. * a_race prdtrace pehspnon
. 
. * Fix 2013-2015 state codes (using FIPS - convert to 1960 census codes since all other data in that format)
. 
. replace state=11 if state_fips==23 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(10,836 real changes made)

. replace state=12 if state_fips==33 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(15,724 real changes made)

. replace state=13 if state_fips==50 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(12,437 real changes made)

. replace state=14 if state_fips==25 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(18,781 real changes made)

. replace state=15 if state_fips==44 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(11,901 real changes made)

. replace state=16 if state_fips==9  & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(15,521 real changes made)

. replace state=21 if state_fips==36 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(44,205 real changes made)

. replace state=22 if state_fips==34 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(21,899 real changes made)

. replace state=23 if state_fips==42 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(29,259 real changes made)

. replace state=31 if state_fips==39 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(27,074 real changes made)

. replace state=32 if state_fips==18 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(17,111 real changes made)

. replace state=33 if state_fips==17 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(32,013 real changes made)

. replace state=34 if state_fips==26 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(23,112 real changes made)

. replace state=35 if state_fips==55 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(16,259 real changes made)

. replace state=41 if state_fips==27 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(18,012 real changes made)

. replace state=42 if state_fips==19 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(14,746 real changes made)

. replace state=43 if state_fips==29 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(15,205 real changes made)

. replace state=44 if state_fips==38 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(13,744 real changes made)

. replace state=45 if state_fips==46 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(12,742 real changes made)

. replace state=46 if state_fips==31 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(14,305 real changes made)

. replace state=47 if state_fips==20 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(14,059 real changes made)

. replace state=51 if state_fips==10 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(13,581 real changes made)

. replace state=52 if state_fips==24 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(18,631 real changes made)

. replace state=53 if state_fips==11 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(16,541 real changes made)

. replace state=54 if state_fips==51 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(21,566 real changes made)

. replace state=55 if state_fips==54 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(16,991 real changes made)

. replace state=56 if state_fips==37 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(23,656 real changes made)

. replace state=57 if state_fips==45 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(15,764 real changes made)

. replace state=58 if state_fips==13 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(24,497 real changes made)

. replace state=59 if state_fips==12 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(47,550 real changes made)

. replace state=61 if state_fips==21 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(13,357 real changes made)

. replace state=62 if state_fips==47 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(17,195 real changes made)

. replace state=63 if state_fips==1  & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(17,751 real changes made)

. replace state=64 if state_fips==28 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(16,705 real changes made)

. replace state=71 if state_fips==5  & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(16,742 real changes made)

. replace state=72 if state_fips==22 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(20,172 real changes made)

. replace state=73 if state_fips==40 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(15,716 real changes made)

. replace state=74 if state_fips==48 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(69,478 real changes made)

. replace state=81 if state_fips==30 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(15,919 real changes made)

. replace state=82 if state_fips==16 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(15,347 real changes made)

. replace state=83 if state_fips==56 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(14,285 real changes made)

. replace state=84 if state_fips==8  & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(17,619 real changes made)

. replace state=85 if state_fips==35 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(18,302 real changes made)

. replace state=86 if state_fips==4  & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(17,018 real changes made)

. replace state=87 if state_fips==49 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(17,086 real changes made)

. replace state=88 if state_fips==32 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(15,521 real changes made)

. replace state=91 if state_fips==53 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(19,490 real changes made)

. replace state=92 if state_fips==41 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(15,353 real changes made)

. replace state=93 if state_fips==6  & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(110,131 real changes made)

. replace state=94 if state_fips==2  & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(13,206 real changes made)

. replace state=95 if state_fips==15 & inlist(year, 2013, 2014, 2015, 2016, 2017, 2018)
(18,443 real changes made)

. 
. drop state_fips

. 
. #delimit ;
delimiter now ;
. label values state state;

. label define state
>         11      "Maine"
>         12      "New Hampshire"
>         13      "Vermont"
>         14      "Massachusetts"
>         15      "Rhode Island"
>         16      "Connecticut"
>         21      "New York"
>         22      "New Jersey"
>         23      "Pennsylvania"
>         31      "Ohio"
>         32      "Indiana"
>         33      "Illinois"
>         34      "Michigan"
>         35      "Wisconsin"
>         41      "Minnesota"
>         42      "Iowa"
>         43          "Missouri"                      
>         44          "North Dakota"                  
>         45          "South Dakota"                  
>         46          "Nebraska"                      
>         47          "Kansas"                        
>         51          "Delaware"                      
>         52          "Maryland"                      
>         53          "District of Columbia"          
>         54          "Virginia"                      
>         55          "West Virginia"                 
>         56          "North Carolina"                
>         57          "South Carolina"                
>         58          "Georgia"                       
>         59          "Florida"                       
>         61          "Kentucky"                      
>         62          "Tennessee"                     
>         63          "Alabama"                       
>         64          "Mississippi"                   
>         71          "Arkansas"                      
>         72          "Louisiana"                     
>         73          "Oklahoma"                      
>         74          "Texas"                         
>         81          "Montana"                       
>         82          "Idaho"                         
>         83          "Wyoming"                       
>         84          "Colorado"                      
>         85          "New Mexico"                    
>         86          "Arizona"                       
>         87          "Utah"                          
>         88          "Nevada"                        
>         91          "Washington"                    
>         92          "Oregon"                        
>         93          "California"                    
>         94          "Alaska"                        
>         95          "Hawaii"
>         98          "Overseas"                        
> ;

. #delimit cr
delimiter now cr
. 
. * Industry and occupation codes: (for now, do "last year" questions, although also "last week" ones) *
. replace industry=ind if year>1987 
(5,413,402 real changes made, 5,413,402 to missing)

. replace occup=occ if year>1987 
(5,413,402 real changes made, 5,413,402 to missing)

. 
. ***** AGE COHORTS *****
. 
. * Birth year:
. gen byear=year-age
(5,413,402 missing values generated)

. 
. * 4-year birth cohorts (1977-80, 81-84, 85-88, 89-92)
. gen byear_grp="77-80" if byear>=1977 & byear<=1980
(6,950,000 missing values generated)

. replace byear_grp="81-84" if byear>=1981 & byear<=1984
(57,061 real changes made)

. replace byear_grp="85-88" if byear>=1985 & byear<=1988
(14,821 real changes made)

. replace byear_grp="89-92" if byear>=1989 & byear<=1992
(0 real changes made)

. 
. 
. **** EDUCATION ****
. 
. gen educ=.
(7,046,430 missing values generated)

. replace educ=1 if ed_lths==1
(1,184,598 real changes made)

. replace educ=2 if ed_hs==1
(1,322,610 real changes made)

. replace educ=3 if ed_scol==1
(1,669,984 real changes made)

. replace educ=4 if ed_ba==1
(668,983 real changes made)

. replace educ=5 if ed_grad==1
(535,409 real changes made)

. 
. merge m:1 year ind using "`ipumsdata'/cps_ind_xwalk.dta" 

    Result                      Number of obs
    -----------------------------------------
    Not matched                     7,053,165
        from master                 7,046,430  (_merge==1)
        from using                      6,735  (_merge==2)

    Matched                                 0  (_merge==3)
    -----------------------------------------

. tab _merge /* only 9 ppl didn't match - not sure why, small so not going to worry */

   Matching result from |
                  merge |      Freq.     Percent        Cum.
------------------------+-----------------------------------
        Master only (1) |  7,046,430       99.90       99.90
         Using only (2) |      6,735        0.10      100.00
------------------------+-----------------------------------
                  Total |  7,053,165      100.00

. drop _merge

. merge m:1 year occ using "`ipumsdata'/cps_occ_xwalk.dta" 

    Result                      Number of obs
    -----------------------------------------
    Not matched                     7,066,569
        from master                 7,053,165  (_merge==1)
        from using                     13,404  (_merge==2)

    Matched                                 0  (_merge==3)
    -----------------------------------------

. tab _merge /* again, looks good, w/ 138/4.7 million that don't match */

   Matching result from |
                  merge |      Freq.     Percent        Cum.
------------------------+-----------------------------------
        Master only (1) |  7,053,165       99.81       99.81
         Using only (2) |     13,404        0.19      100.00
------------------------+-----------------------------------
                  Total |  7,066,569      100.00

. drop _merge

. 
. tempfile all

. save `all'
file /tmp/St2868349.00001a saved as .dta format

. 
. ** Convert income vars to real income (2015$)
.  use"`prepdata'/cpi.dta", clear

.  keep if month==3
(803 observations deleted)

.  keep if inrange(year,1979,2018)
(33 observations deleted)

.  keep year monthly

.  rename monthly cpi

.  label var cpi "CPI (Annual avg)"

.  su cpi if year==2015, meanonly

.  local cpi2015=r(mean)

.  merge 1:m year using `all', assert(3)
(variable year was int, now double to accommodate using data's values)

    Result                      Number of obs
    -----------------------------------------
    Not matched                             0
    Matched                         7,066,569  (_merge==3)
    -----------------------------------------

.  drop _merge

.  foreach v of varlist incearn ern_val ws_val wsal_val se_val semp_val frm_val frse_val ///
>         uc_val wc_val ss_val ssi_val paw_val vet_val sur_val1 ///
>         sur_val2 srvs_val dis_val1 dis_val2 dsab_val ret_val1 ///
>         ret_val2 rtm_val int_val div_val rnt_val ed_val csp_val ///
>         alm_val fin_val oi_val ptotval pearnval pothval p_mvcare ///
>         p_mvcaid eit_cred agi {
  2.                    gen `v'_r=`v'*`cpi2015'/cpi
  3.                    local label : var label `v'
  4.                    label var `v'_r "`label' (2015$)"
  5. }
(5,433,541 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(510,813 missing values generated)
(1,653,167 missing values generated)
(510,813 missing values generated)
(1,497,699 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(510,813 missing values generated)
(510,813 missing values generated)
(510,813 missing values generated)
(1,497,699 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(666,281 missing values generated)
(510,813 missing values generated)
(1,497,699 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(2,403,676 missing values generated)
(1,653,167 missing values generated)
(1,653,167 missing values generated)
(20,139 missing values generated)
(20,139 missing values generated)
(1,653,167 missing values generated)
(3,020,899 missing values generated)
(3,020,899 missing values generated)
(2,270,390 missing values generated)
(2,270,390 missing values generated)

. *Make a top-coded total earnings that is censored at the 98th percentile by year
.  gen pearnval_tc_r=pearnval_r
(20,139 missing values generated)

.  levelsof year, local(yrlist)
1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 20
> 05 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018

.  foreach y of local yrlist {
  2.    _pctile pearnval_r [aw=marsupwt] if year==`y', percentiles(98)
  3.    replace pearnval_tc_r=r(r1) if year==`y' & pearnval_r>r(r1) & pearnval_r<.
  4.  }
(5,110 real changes made)
(3,362 real changes made)
(3,246 real changes made)
(3,020 real changes made)
(3,013 real changes made)
(3,005 real changes made)
(3,069 real changes made)
(2,511 real changes made)
(2,950 real changes made)
(2,945 real changes made)
(2,641 real changes made)
(2,871 real changes made)
(2,958 real changes made)
(2,883 real changes made)
(2,860 real changes made)
(2,689 real changes made)
(2,655 real changes made)
(2,374 real changes made)
(2,471 real changes made)
(2,505 real changes made)
(2,500 real changes made)
(2,433 real changes made)
(2,010 real changes made)
(3,790 real changes made)
(4,049 real changes made)
(4,076 real changes made)
(4,010 real changes made)
(3,974 real changes made)
(3,773 real changes made)
(3,894 real changes made)
(3,957 real changes made)
(4,025 real changes made)
(3,830 real changes made)
(3,863 real changes made)
(4,040 real changes made)
(2,758 real changes made)
(3,816 real changes made)
(3,051 real changes made)
(3,276 real changes made)
(3,453 real changes made)

.  
. * A few more small things
. replace age=a_age if year>1987
(5,413,402 real changes made)

. drop a_age

. ren age1 age_bins

. 
. 
. /********* CODE MAJOR OCCUPATION CATEGORIES ******
> 
> * Labels from IPUMS (also correspond to major categories in Census 2010 Occ Codes Xwalk)
> 
> qui gen occ_major=""
> label var occ_major "2010 Census Occupation: Major"
> qui replace occ_major="Management in Business, Science, and Arts"       if occ2010>=10 & occ2010<=430
> qui replace occ_major="Business Operations Specialists"                 if occ2010>=500 & occ2010<=730
> qui replace occ_major="Financial Specialists"                           if occ2010>=800 & occ2010<=950
> qui replace occ_major="Computer and Mathematical"                       if occ2010>=1000 & occ2010<=1240
> qui replace occ_major="Architecture and Engineering"                    if occ2010>=1300 & occ2010<=1540
> qui replace occ_major="Technicians"                                     if occ2010>=1550 & occ2010<=1560  
> qui replace occ_major="Life, Physical, and Social Science:"             if occ2010>=1600 & occ2010<=1980 
> qui replace occ_major="Community and Social Services"                   if occ2010>=2000 & occ2010<=2060  
> qui replace occ_major="Legal"                                           if occ2010>=2100 & occ2010<=2150  
> qui replace occ_major="Education, Training, and Library"                if occ2010>=2200 & occ2010<=2550  
> qui replace occ_major="Arts, Design, Entertainment, Sports, and Media"  if occ2010>=2600 & occ2010<=2920  
> qui replace occ_major="Healthcare Practitioners and Technicians"        if occ2010>=3000 & occ2010<=3540  
> qui replace occ_major="Healthcare Support"                              if occ2010>=3600 & occ2010<=3650 
> qui replace occ_major="Protective Service"                              if occ2010>=3700 & occ2010<=3950  
> qui replace occ_major="Food Preparation and Serving"                    if occ2010>=4000 & occ2010<=4150  
> qui replace occ_major="Building and Grounds Cleaning and Maintenance"   if occ2010>=4200 & occ2010<=4250  
> qui replace occ_major="Personal Care and Service"                       if occ2010>=4300 & occ2010<=4650 
> qui replace occ_major="Sales and Related"                               if occ2010>=4700 & occ2010<=4965 
> qui replace occ_major="Office and Administrative Support"               if occ2010>=5000 & occ2010<=5940  
> qui replace occ_major="Farming, Fisheries, and Forestry"                if occ2010>=6005 & occ2010<=6130  
> qui replace occ_major="Construction"                                    if occ2010>=6200 & occ2010<=6765  
> qui replace occ_major="Extraction"                                      if occ2010>=6800 & occ2010<=6940  
> qui replace occ_major="Installation, Maintenance, and Repair"           if occ2010>=7000 & occ2010<=7630  
> qui replace occ_major="Production"                                      if occ2010>=7700 & occ2010<=8965  
> qui replace occ_major="Transportation and Material Moving"              if occ2010>=9000 & occ2010<=9750  
> qui replace occ_major="Military"                                        if occ2010>=9800 & occ2010<=9830
> qui replace occ_major="No Occupation"                                   if occ2010==9920 
> */
. 
. ** SAVE FILE **
. 
. compress
  variable industry was int now byte
  variable occup was int now byte
  variable year was double now int
  variable nwlook was double now byte
  variable wkswork was double now byte
  variable hrswk was double now byte
  variable weclw was double now byte
  variable state was double now byte
  variable pearnval was double now long
  variable ptotval was double now long
  variable ljcw was double now byte
  variable lkweeks was double now byte
  variable nwlkwk was double now byte
  variable a_occ was double now int
  variable a_mjocc was double now byte
  variable a_ind was double now int
  variable a_mjind was double now byte
  variable a_werntf was double now byte
  variable byear was double now int
  variable educ was double now byte
  (833,855,142 bytes saved)

. save `prepdata'/`dofile'.dta, replace
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/clean_compile_march.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/clean_compile_march.dta saved

. ! gzip -f `prepdata'/`dofile'.dta


. 
. if `doasproject'==1 project, creates(`prepdata'/`dofile'.dta.gz)
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/clean_compile_march.dta.gz" filesig(
> 2430876695:937693911)

. 
. 
. * end of do file *
. 
. 
end of do-file
      name:  plog_735
       log:  /accounts/projects/jr_ra/GRscarring/erratum/programs/prepare/clean_compile_march.log
  log type:  text
 closed on:  27 Nov 2024, 17:13:15
------------------------------------------------------------------------------------------------------------------------------------
